
# unpaired t-test as a regression model and difference between two regression models

library(foreign)
x <- read.spss("U:\\MY DOCUMENTS\\GLM B Subs.sav")
x1 <- data.frame(x)
x1
attach(x1,2)
t.test(crime ~ location, data = x1,var.equal=T)
anova(lm(crime ~ 1),lm(crime ~ location))

# Multiple regression as a difference between two regression models

x <- read.spss("U:\\MY DOCUMENTS\\GLM M REG.sav")
x1 <- data.frame(x)
x1
attach(x1,2)
lm(EFT ~ NART+BLOCK)
anova(lm(EFT ~ NART+BLOCK))
anova(lm(EFT ~ 1),lm(EFT ~ NART+BLOCK))


* AGREEMENT BETWEEN COMPARING NESTED MODELS AND A SINGLE MODEL FIT ABOVE
* SS NART AND BLOCK (IN SINGLE LM)= 3682.2+3005.3 = 6687.50 = CHANGE IN SS
COMPARING TWO LMs; DF = 11 - 9 = 2.
ERROR MSE FOR COMPARING IF NART AND BLOCK JOINTLY HAVE ZERO CORRELATION
WITH EFT = 4275.4/9 = 475 in BOTH APPROACHES ABOVE.

anova(lm(EFT ~ NART),lm(EFT ~ NART+BLOCK))

SAME RESULT FOR F TEST OF NART ADJUSTED FOR BLOCK (7280.7-4275.4)/[(4275.5/9)] = 6.32, p=0.03

GENERAL POINT: YOU CAN READ IN DATA IN A VARIETY OF FORMATS INTO R INCLUDING
TAB SEPARATED DATA AS BELOW. THIS CAN BE SAVED FROM EXCEL. 

yy <- read.table("U:\\My Documents\\GLM B subs DAT.txt")
rm(x1)
x1 <- yy
x1 <- data.frame(x)
x1
attach(x1,2)
t.test(crime ~ location, data = x1,var.equal=T)
anova(lm(crime ~ 1),lm(crime ~ location))

>>>>>>>>>>>>>>>>> OUTPUT


> library(foreign)
> x <- read.spss("U:\\MY DOCUMENTS\\GLM B Subs.sav")
> x1 <- data.frame(x)
> x1
   crime age educatio            location LOC  age_c INTERCEPT
1   79.1 151       91  southern US States   1  12.43         1
2  163.5 143      113 northern  US states  -1   4.43         1
3   57.8 142       89  southern US States   1   3.43         1
4  196.9 136      121 northern  US states  -1  -2.57         1
5  123.4 141      121 northern  US states  -1   2.43         1
6   68.2 121      110 northern  US states  -1 -17.57         1
7   96.3 127      111  southern US States   1 -11.57         1
8  155.5 131      109  southern US States   1  -7.57         1
9   85.6 157       90  southern US States   1  18.43         1
10  70.5 140      118 northern  US states  -1   1.43         1
11 167.4 124      105 northern  US states  -1 -14.57         1
12  84.9 134      108 northern  US states  -1  -4.57         1
13  51.1 128      113 northern  US states  -1 -10.57         1
14  66.4 135      117 northern  US states  -1  -3.57         1
15  79.8 152       87  southern US States   1  13.43         1
16  94.6 142       88  southern US States   1   3.43         1
17  53.9 143      110 northern  US states  -1   4.43         1
18  92.9 135      104  southern US States   1  -3.57         1
19  75.0 130      116 northern  US states  -1  -8.57         1
20 122.5 125      108 northern  US states  -1 -13.57         1
21  74.2 126      108 northern  US states  -1 -12.57         1
22  43.9 157       89  southern US States   1  18.43         1
23 121.6 132       96 northern  US states  -1  -6.57         1
24  96.8 131      116 northern  US states  -1  -7.57         1
25  52.3 130      116 northern  US states  -1  -8.57         1
26 199.3 131      121 northern  US states  -1  -7.57         1
27  34.2 135      109 northern  US states  -1  -3.57         1
28 121.6 152      112 northern  US states  -1  13.43         1
29 104.3 119      107 northern  US states  -1 -19.57         1
30  69.6 166       89  southern US States   1  27.43         1
31  37.3 140       93 northern  US states  -1   1.43         1
32  75.4 125      109 northern  US states  -1 -13.57         1
33 107.2 147      104  southern US States   1   8.43         1
34  92.3 126      118 northern  US states  -1 -12.57         1
35  65.3 123      102 northern  US states  -1 -15.57         1
36 127.2 150      100 northern  US states  -1  11.43         1
37  83.1 177       87  southern US States   1  38.43         1
38  56.6 133      104 northern  US states  -1  -5.57         1
39  82.6 149       88  southern US States   1  10.43         1
40 115.1 145      104  southern US States   1   6.43         1
41  88.0 148      122 northern  US states  -1   9.43         1
42  54.2 141      109 northern  US states  -1   2.43         1
43  82.3 162       99  southern US States   1  23.43         1
44 103.0 136      121 northern  US states  -1  -2.57         1
45  45.5 139       88  southern US States   1   0.43         1
46  50.8 126      104 northern  US states  -1 -12.57         1
47  84.9 130      121 northern  US states  -1  -8.57         1
> attach(x1,2)
> t.test(crime ~ location, data = x1,var.equal=T)

        Two Sample t-test

data:  crime by location
t = 0.61052, df = 45, p-value = 0.5446
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -16.82562  31.46312
sample estimates:
mean in group northern  US states  mean in group southern US States 
                         93.00000                          85.68125 

> anova(lm(crime ~ 1),lm(crime ~ location))
Analysis of Variance Table

Model 1: crime ~ 1
Model 2: crime ~ location
  Res.Df   RSS Df Sum of Sq      F Pr(>F)
1     46 68809                           
2     45 68244  1    565.27 0.3727 0.5446
> 
> # Multiple regression as a difference between two regression models
> 
> x <- read.spss("U:\\MY DOCUMENTS\\GLM M REG.sav")
> x1 <- data.frame(x)
> x1
   EFT NART BLOCK INTERCEPT VAR00005  Y COV INTERCPT GROUP COV_GRP
1   59   90   317         1       NA  5   9        1     1       9
2   33  101   464         1       NA  7   3        1     1       3
3   49   95   525         1       NA  2   7        1     1       7
4   69  110   298         1       NA  3   2        1     1       2
5   65  108   491         1       NA  4   8        1     1       8
6   26   92   196         1       NA  8   2        1     0       0
7   29  112   268         1       NA  7   8        1     0       0
8   62   89   372         1       NA  6   3        1     0       0
9   31   94   370         1       NA  5   8        1     0       0
10 139  118   739         1       NA  6   1        1     0       0
11  74  110   430         1       NA NA  NA       NA    NA      NA
12  31   94   410         1       NA NA  NA       NA    NA      NA
> attach(x1,2)
The following object is masked from x1 (pos = 3):

    INTERCEPT

> lm(EFT ~ NART+BLOCK)

Call:
lm(formula = EFT ~ NART + BLOCK)

Coefficients:
(Intercept)         NART        BLOCK  
  -101.5148       1.0340       0.1293  

> anova(lm(EFT ~ NART+BLOCK))
Analysis of Variance Table

Response: EFT
          Df Sum Sq Mean Sq F value  Pr(>F)  
NART       1 3682.2  3682.2  7.7513 0.02126 *
BLOCK      1 3005.3  3005.3  6.3262 0.03303 *
Residuals  9 4275.4   475.0                  
---
Signif. codes:  0 *** 0.001 ** 0.01 * 0.05 . 0.1   1
> anova(lm(EFT ~ 1),lm(EFT ~ NART+BLOCK))
Analysis of Variance Table

Model 1: EFT ~ 1
Model 2: EFT ~ NART + BLOCK
  Res.Df     RSS Df Sum of Sq      F  Pr(>F)  
1     11 10962.9                              
2      9  4275.4  2    6687.5 7.0388 0.01445 *
---
Signif. codes:  0 *** 0.001 ** 0.01 * 0.05 . 0.1   1
> anova(lm(EFT ~ NART),lm(EFT ~ NART+BLOCK))
Analysis of Variance Table

Model 1: EFT ~ NART
Model 2: EFT ~ NART + BLOCK
  Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
1     10 7280.7                              
2      9 4275.4  1    3005.3 6.3262 0.03303 *
---
Signif. codes:  0 *** 0.001 ** 0.01 * 0.05 . 0.1   1
> 
